import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
df = pd.read_csv(r"C:\Users\Iddrisu Bachokun\Desktop\Python\Data\parkinsin\parkinsons.csv")
pd.set_option('display.max_columns',None)
df.head()
| name | MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Jitter(%) | MDVP:Jitter(Abs) | MDVP:RAP | MDVP:PPQ | Jitter:DDP | MDVP:Shimmer | MDVP:Shimmer(dB) | Shimmer:APQ3 | Shimmer:APQ5 | MDVP:APQ | Shimmer:DDA | NHR | HNR | status | RPDE | DFA | spread1 | spread2 | D2 | PPE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | phon_R01_S01_1 | 119.992 | 157.302 | 74.997 | 0.00784 | 0.00007 | 0.00370 | 0.00554 | 0.01109 | 0.04374 | 0.426 | 0.02182 | 0.03130 | 0.02971 | 0.06545 | 0.02211 | 21.033 | 1 | 0.414783 | 0.815285 | -4.813031 | 0.266482 | 2.301442 | 0.284654 |
| 1 | phon_R01_S01_2 | 122.400 | 148.650 | 113.819 | 0.00968 | 0.00008 | 0.00465 | 0.00696 | 0.01394 | 0.06134 | 0.626 | 0.03134 | 0.04518 | 0.04368 | 0.09403 | 0.01929 | 19.085 | 1 | 0.458359 | 0.819521 | -4.075192 | 0.335590 | 2.486855 | 0.368674 |
| 2 | phon_R01_S01_3 | 116.682 | 131.111 | 111.555 | 0.01050 | 0.00009 | 0.00544 | 0.00781 | 0.01633 | 0.05233 | 0.482 | 0.02757 | 0.03858 | 0.03590 | 0.08270 | 0.01309 | 20.651 | 1 | 0.429895 | 0.825288 | -4.443179 | 0.311173 | 2.342259 | 0.332634 |
| 3 | phon_R01_S01_4 | 116.676 | 137.871 | 111.366 | 0.00997 | 0.00009 | 0.00502 | 0.00698 | 0.01505 | 0.05492 | 0.517 | 0.02924 | 0.04005 | 0.03772 | 0.08771 | 0.01353 | 20.644 | 1 | 0.434969 | 0.819235 | -4.117501 | 0.334147 | 2.405554 | 0.368975 |
| 4 | phon_R01_S01_5 | 116.014 | 141.781 | 110.655 | 0.01284 | 0.00011 | 0.00655 | 0.00908 | 0.01966 | 0.06425 | 0.584 | 0.03490 | 0.04825 | 0.04465 | 0.10470 | 0.01767 | 19.649 | 1 | 0.417356 | 0.823484 | -3.747787 | 0.234513 | 2.332180 | 0.410335 |
df1 = pd.read_csv(r"C:\Users\Iddrisu Bachokun\Desktop\Python\Data\parkinsin\parkinsons.csv",usecols=["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Shimmer(dB)",
"Shimmer:APQ3","Shimmer:APQ5","NHR","HNR","status"])
df1.head()
| MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Shimmer(dB) | Shimmer:APQ3 | Shimmer:APQ5 | NHR | HNR | status | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 119.992 | 157.302 | 74.997 | 0.426 | 0.02182 | 0.03130 | 0.02211 | 21.033 | 1 |
| 1 | 122.400 | 148.650 | 113.819 | 0.626 | 0.03134 | 0.04518 | 0.01929 | 19.085 | 1 |
| 2 | 116.682 | 131.111 | 111.555 | 0.482 | 0.02757 | 0.03858 | 0.01309 | 20.651 | 1 |
| 3 | 116.676 | 137.871 | 111.366 | 0.517 | 0.02924 | 0.04005 | 0.01353 | 20.644 | 1 |
| 4 | 116.014 | 141.781 | 110.655 | 0.584 | 0.03490 | 0.04825 | 0.01767 | 19.649 | 1 |
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 195 entries, 0 to 194 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 MDVP:Fo(Hz) 195 non-null float64 1 MDVP:Fhi(Hz) 195 non-null float64 2 MDVP:Flo(Hz) 195 non-null float64 3 MDVP:Shimmer(dB) 195 non-null float64 4 Shimmer:APQ3 195 non-null float64 5 Shimmer:APQ5 195 non-null float64 6 NHR 195 non-null float64 7 HNR 195 non-null float64 8 status 195 non-null int64 9 D2 195 non-null float64 10 PPE 195 non-null float64 dtypes: float64(10), int64(1) memory usage: 16.9 KB
df1.describe()
| MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Shimmer(dB) | Shimmer:APQ3 | Shimmer:APQ5 | NHR | HNR | status | |
|---|---|---|---|---|---|---|---|---|---|
| count | 195.000000 | 195.000000 | 195.000000 | 195.000000 | 195.000000 | 195.000000 | 195.000000 | 195.000000 | 195.000000 |
| mean | 154.228641 | 197.104918 | 116.324631 | 0.282251 | 0.015664 | 0.017878 | 0.024847 | 21.885974 | 0.753846 |
| std | 41.390065 | 91.491548 | 43.521413 | 0.194877 | 0.010153 | 0.012024 | 0.040418 | 4.425764 | 0.431878 |
| min | 88.333000 | 102.145000 | 65.476000 | 0.085000 | 0.004550 | 0.005700 | 0.000650 | 8.441000 | 0.000000 |
| 25% | 117.572000 | 134.862500 | 84.291000 | 0.148500 | 0.008245 | 0.009580 | 0.005925 | 19.198000 | 1.000000 |
| 50% | 148.790000 | 175.829000 | 104.315000 | 0.221000 | 0.012790 | 0.013470 | 0.011660 | 22.085000 | 1.000000 |
| 75% | 182.769000 | 224.205500 | 140.018500 | 0.350000 | 0.020265 | 0.022380 | 0.025640 | 25.075500 | 1.000000 |
| max | 260.105000 | 592.030000 | 239.170000 | 1.302000 | 0.056470 | 0.079400 | 0.314820 | 33.047000 | 1.000000 |
df1.isna().sum()
MDVP:Fo(Hz) 0 MDVP:Fhi(Hz) 0 MDVP:Flo(Hz) 0 MDVP:Shimmer(dB) 0 Shimmer:APQ3 0 Shimmer:APQ5 0 NHR 0 HNR 0 status 0 dtype: int64
df1.status.unique()
array([1, 0], dtype=int64)
df1['status'].value_counts()
1 147 0 48 Name: status, dtype: int64
from imblearn.over_sampling import RandomOverSampler
train , valid, test = np.split(df1.sample(frac=1), [int(0.6*len(df1)),int(0.8*len(df1))])
def scale_dataset(dataframe):
x = dataframe[dataframe.cols[:-1]].values
y = dataframe[dataframe.cols[-1]].values
scaler = StandardScaler()
x = scaler.fit_transform(x)
data = np.hstack((x,np.reshape(y,(-1,1))))
return data, x,y
print(len(train[train['status']==1])) # gamma
print(len(train[train['status']==0])) # hadron
91 26
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import StandardScaler
def scale_dataset(dataframe,oversample=False):
x = dataframe[dataframe.columns[:-1]].values
y = dataframe[dataframe.columns[-1]].values
scaler = StandardScaler()
x = scaler.fit_transform(x)
if oversample:
ros = RandomOverSampler()
x , y = ros.fit_resample(x,y)
data = np.hstack((x,np.reshape(y,(-1,1))))
return data, x,y
def scale_dataset(dataframe,oversample=False):
x = dataframe[dataframe.columns[:-1]].values
y = dataframe[dataframe.columns[-1]].values
scaler = StandardScaler()
x = scaler.fit_transform(x)
if oversample:
ros = RandomOverSampler()
x , y = ros.fit_resample(x,y)
data = np.hstack((x,np.reshape(y,(-1,1))))
return data, x,y
print(len(train[train['status']==1])) # gamma
print(len(train[train['status']==0])) # hadron
91 26
train, x_train, y_train = scale_dataset(train, oversample=True)
valid, x_valid ,y_valid = scale_dataset(valid, oversample=False)
test, x_test, y_test = scale_dataset(test, oversample=False)
sum(y_train==1)
91
sum(y_train==0)
91
for label in df1.columns[:-1] :
plt.hist(df1[df1["status"]==1][label], color ='blue', label = 'Disease', alpha=0.7, density= True)
plt.hist(df1[df1["status"]==0][label], color ='red',label = 'No Disease', alpha=0.7, density= True)
plt.title(label)
plt.ylabel("Probability")
plt.xlabel(label)
plt.legend()
plt.show()
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
knn_model = KNeighborsClassifier()
knn_model.fit(x_train,y_train)
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
y_pred = knn_model.predict(x_test)
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.53 0.73 0.62 11
1 0.88 0.75 0.81 28
accuracy 0.74 39
macro avg 0.70 0.74 0.71 39
weighted avg 0.78 0.74 0.75 39
input_data = (119.992,157.302,74.997,0.426,0.02182,0.03130,0.02211,21.033)
input_data_np = np.asarray(input_data)
input_data_re = input_data_np.reshape(1,-1)
pred = knn_model.predict(input_data_re)
print(pred)
if(pred[0]==0):
print("The person has no disease")
else:
print("The person has the disease")
[0] The person has no disease
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(x_test,y_test)
GaussianNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GaussianNB()
y_pred = nb_model.predict(x_test)
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.41 0.82 0.55 11
1 0.88 0.54 0.67 28
accuracy 0.62 39
macro avg 0.65 0.68 0.61 39
weighted avg 0.75 0.62 0.63 39
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression()
logistic_model.fit(x_train,y_train)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
y_pred = logistic_model.predict(x_test)
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.41 0.82 0.55 11
1 0.88 0.54 0.67 28
accuracy 0.62 39
macro avg 0.65 0.68 0.61 39
weighted avg 0.75 0.62 0.63 39
from sklearn.svm import SVC
sv_model = SVC()
sv_model.fit(x_train,y_train)
SVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC()
y_pred = sv_model.predict(x_test)
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.44 0.73 0.55 11
1 0.86 0.64 0.73 28
accuracy 0.67 39
macro avg 0.65 0.69 0.64 39
weighted avg 0.74 0.67 0.68 39
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier()
dt_model.fit(x_train,y_train)
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier()
y_pred = dt_model.predict(x_test)
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.86 0.55 0.67 11
1 0.84 0.96 0.90 28
accuracy 0.85 39
macro avg 0.85 0.75 0.78 39
weighted avg 0.85 0.85 0.83 39
import tensorflow as tf
def plot_history(history):
fig ,(ax1, ax2)=plt.subplots(1,2,figsize = (10,4))
ax1.plot(history.history['loss'],label='loss')
ax1.plot(history.history['val_loss'],label='val_loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Binary_crossentropy')
#ax1.legend()
ax1.grid(True)
#def plot_accuracy(history):
ax2.plot(history.history['accuracy'],label='accuracy')
ax2.plot(history.history['val_accuracy'],label='val_accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
#ax2.legend()
ax2.grid(True)
plt.show()
import tensorflow as tf
def train_model(X_train, y_train, num_nodes, dropout_prob,lr, batch_size, epochs):
nnw_model =tf.keras.Sequential([
tf.keras.layers.Dense(num_nodes,activation='relu',input_shape=(8,)),
tf.keras.layers.Dropout(dropout_prob),
tf.keras.layers.Dense(num_nodes,activation='relu'),
tf.keras.layers.Dropout(dropout_prob),
tf.keras.layers.Dense(1,activation='sigmoid')])
nnw_model.compile(optimizer= tf.keras.optimizers.Adam(lr),loss='binary_crossentropy',
metrics=['accuracy'])
# Training the model
history = nnw_model.fit(
x_train, y_train, epochs=epochs, batch_size= batch_size, validation_split=0.2, verbose=0
)
return nnw_model, history
east_val_loss = list('inf')
least_loss_model = None
epochs = 100
for num_nodes in [16,32,64]:
for dropout_prob in[0,0.2]:
for lr in [0.01, 0.005, 0.001]:
for batch_size in [32,64,128]:
print(f"{num_nodes} nodes, dropout {dropout_prob},lr {lr},batch size {batch_size}")
model , history = train_model(x_train, y_train, num_nodes, dropout_prob, lr, batch_size, epochs)
plot_history(history)
val_loss= model.evaluate(x_valid, y_valid)
#if val_loss < least_val_loss:
least_val_loss = val_loss
least_loss_model = model
16 nodes, dropout 0,lr 0.01,batch size 32
2/2 [==============================] - 0s 3ms/step - loss: 1.4168 - accuracy: 0.8205 16 nodes, dropout 0,lr 0.01,batch size 64
2/2 [==============================] - 0s 3ms/step - loss: 1.7528 - accuracy: 0.8205 16 nodes, dropout 0,lr 0.01,batch size 128
2/2 [==============================] - 0s 4ms/step - loss: 0.8667 - accuracy: 0.7949 16 nodes, dropout 0,lr 0.005,batch size 32
2/2 [==============================] - 0s 4ms/step - loss: 0.8602 - accuracy: 0.8205 16 nodes, dropout 0,lr 0.005,batch size 64
2/2 [==============================] - 0s 3ms/step - loss: 0.6783 - accuracy: 0.8462 16 nodes, dropout 0,lr 0.005,batch size 128
2/2 [==============================] - 0s 3ms/step - loss: 0.4338 - accuracy: 0.8462 16 nodes, dropout 0,lr 0.001,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 0.4735 - accuracy: 0.7949 16 nodes, dropout 0,lr 0.001,batch size 64
2/2 [==============================] - 0s 3ms/step - loss: 0.3914 - accuracy: 0.7949 16 nodes, dropout 0,lr 0.001,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 0.3944 - accuracy: 0.7949 16 nodes, dropout 0.2,lr 0.01,batch size 32
2/2 [==============================] - 0s 4ms/step - loss: 1.1207 - accuracy: 0.8462 16 nodes, dropout 0.2,lr 0.01,batch size 64
2/2 [==============================] - 0s 4ms/step - loss: 0.5186 - accuracy: 0.8462 16 nodes, dropout 0.2,lr 0.01,batch size 128
2/2 [==============================] - 0s 3ms/step - loss: 0.9787 - accuracy: 0.8718 16 nodes, dropout 0.2,lr 0.005,batch size 32
2/2 [==============================] - 0s 4ms/step - loss: 0.5754 - accuracy: 0.8462 16 nodes, dropout 0.2,lr 0.005,batch size 64
2/2 [==============================] - 0s 3ms/step - loss: 0.5434 - accuracy: 0.8462 16 nodes, dropout 0.2,lr 0.005,batch size 128
2/2 [==============================] - 0s 4ms/step - loss: 0.3699 - accuracy: 0.8205 16 nodes, dropout 0.2,lr 0.001,batch size 32
2/2 [==============================] - 0s 4ms/step - loss: 0.4184 - accuracy: 0.7949 16 nodes, dropout 0.2,lr 0.001,batch size 64
2/2 [==============================] - 0s 4ms/step - loss: 0.4187 - accuracy: 0.7692 16 nodes, dropout 0.2,lr 0.001,batch size 128
2/2 [==============================] - 0s 3ms/step - loss: 0.3923 - accuracy: 0.8205 32 nodes, dropout 0,lr 0.01,batch size 32
2/2 [==============================] - 0s 3ms/step - loss: 2.6784 - accuracy: 0.7949 32 nodes, dropout 0,lr 0.01,batch size 64
2/2 [==============================] - 0s 5ms/step - loss: 2.2335 - accuracy: 0.7949 32 nodes, dropout 0,lr 0.01,batch size 128
2/2 [==============================] - 0s 3ms/step - loss: 0.8360 - accuracy: 0.8462 32 nodes, dropout 0,lr 0.005,batch size 32
2/2 [==============================] - 0s 3ms/step - loss: 1.2914 - accuracy: 0.8205 32 nodes, dropout 0,lr 0.005,batch size 64
2/2 [==============================] - 0s 3ms/step - loss: 1.2933 - accuracy: 0.7949 32 nodes, dropout 0,lr 0.005,batch size 128
2/2 [==============================] - 0s 2ms/step - loss: 0.6945 - accuracy: 0.8205 32 nodes, dropout 0,lr 0.001,batch size 32
2/2 [==============================] - 0s 3ms/step - loss: 0.4411 - accuracy: 0.8205 32 nodes, dropout 0,lr 0.001,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 0.4680 - accuracy: 0.8462 32 nodes, dropout 0,lr 0.001,batch size 128
2/2 [==============================] - 0s 16ms/step - loss: 0.4147 - accuracy: 0.7949 32 nodes, dropout 0.2,lr 0.01,batch size 32
2/2 [==============================] - 0s 3ms/step - loss: 1.7429 - accuracy: 0.8205 32 nodes, dropout 0.2,lr 0.01,batch size 64
2/2 [==============================] - 0s 13ms/step - loss: 0.6865 - accuracy: 0.8462 32 nodes, dropout 0.2,lr 0.01,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 0.5627 - accuracy: 0.8462 32 nodes, dropout 0.2,lr 0.005,batch size 32
2/2 [==============================] - 0s 16ms/step - loss: 0.9494 - accuracy: 0.8462 32 nodes, dropout 0.2,lr 0.005,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 0.9690 - accuracy: 0.8462 32 nodes, dropout 0.2,lr 0.005,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 0.6138 - accuracy: 0.8205 32 nodes, dropout 0.2,lr 0.001,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 0.4329 - accuracy: 0.7949 32 nodes, dropout 0.2,lr 0.001,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 0.4305 - accuracy: 0.8462 32 nodes, dropout 0.2,lr 0.001,batch size 128
2/2 [==============================] - 0s 16ms/step - loss: 0.4066 - accuracy: 0.8205 64 nodes, dropout 0,lr 0.01,batch size 32
2/2 [==============================] - 0s 16ms/step - loss: 2.3440 - accuracy: 0.8205 64 nodes, dropout 0,lr 0.01,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 2.7491 - accuracy: 0.7692 64 nodes, dropout 0,lr 0.01,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 1.4621 - accuracy: 0.8205 64 nodes, dropout 0,lr 0.005,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 2.4739 - accuracy: 0.8462 64 nodes, dropout 0,lr 0.005,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 1.6316 - accuracy: 0.7949 64 nodes, dropout 0,lr 0.005,batch size 128
2/2 [==============================] - 0s 4ms/step - loss: 1.3957 - accuracy: 0.7949 64 nodes, dropout 0,lr 0.001,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 0.7824 - accuracy: 0.8205 64 nodes, dropout 0,lr 0.001,batch size 64
2/2 [==============================] - 0s 16ms/step - loss: 0.4458 - accuracy: 0.8205 64 nodes, dropout 0,lr 0.001,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 0.4083 - accuracy: 0.8205 64 nodes, dropout 0.2,lr 0.01,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 2.1601 - accuracy: 0.8462 64 nodes, dropout 0.2,lr 0.01,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 1.6248 - accuracy: 0.8205 64 nodes, dropout 0.2,lr 0.01,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 1.1621 - accuracy: 0.8462 64 nodes, dropout 0.2,lr 0.005,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 1.3952 - accuracy: 0.8205 64 nodes, dropout 0.2,lr 0.005,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 1.0800 - accuracy: 0.8205 64 nodes, dropout 0.2,lr 0.005,batch size 128
2/2 [==============================] - 0s 0s/step - loss: 0.7528 - accuracy: 0.7949 64 nodes, dropout 0.2,lr 0.001,batch size 32
2/2 [==============================] - 0s 0s/step - loss: 0.4400 - accuracy: 0.8462 64 nodes, dropout 0.2,lr 0.001,batch size 64
2/2 [==============================] - 0s 0s/step - loss: 0.4205 - accuracy: 0.8718 64 nodes, dropout 0.2,lr 0.001,batch size 128
2/2 [==============================] - 0s 16ms/step - loss: 0.4396 - accuracy: 0.8462